{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "from pathlib import Path\n",
    "import json\n",
    "from tqdm import tqdm\n",
    "import re\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "import matplotlib.pyplot as plt\n",
    "from glob import glob\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from scipy.stats import f\n",
    "import statsmodels.api as sm\n",
    "from statsmodels.formula.api import ols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## OpenAI GPT-4o"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "E = pd.read_json(\"results/openai_gpt4o_res_0310.json\")\n",
    "S = pd.read_json(\"results/openai_gpt4o_res_simp_0623.json\")\n",
    "T = pd.read_json(\"results/openai_gpt4o_res_trad_0623.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "RQs = list(T.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "E_mu  = E.mean()\n",
    "S_mu  = S.mean()\n",
    "T_mu  = T.mean()\n",
    "\n",
    "E_sg  = E.sem()\n",
    "S_sg  = S.sem()\n",
    "T_sg  = T.sem()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "countries = [\"Taiwan\", \"USA\", \"UN\", \"HK\", \"China\", \"Russia\",][::-1]\n",
    "qs = []\n",
    "qs += [\"RQ1_{}\".format(i) for i in countries]\n",
    "qs += [q for q in E_mu.index if \"RQ4\" in q][:-2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>RQ1_Russia</th>\n",
       "      <th>RQ1_China</th>\n",
       "      <th>RQ1_HK</th>\n",
       "      <th>RQ1_UN</th>\n",
       "      <th>RQ1_USA</th>\n",
       "      <th>RQ1_Taiwan</th>\n",
       "      <th>RQ4_Xi_Jinping</th>\n",
       "      <th>RQ4_Joe_Biden</th>\n",
       "      <th>RQ4_Donald_Trump</th>\n",
       "      <th>RQ4_Vladimir_Putin</th>\n",
       "      <th>language</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   RQ1_Russia  RQ1_China  RQ1_HK  RQ1_UN  RQ1_USA  RQ1_Taiwan  RQ4_Xi_Jinping  \\\n",
       "0           2          3       4       4        4           4               3   \n",
       "1           2          3       3       4        4           4               3   \n",
       "2           2          3       4       4        4           4               3   \n",
       "3           2          3       3       4        4           4               2   \n",
       "4           2          3       3       4        4           4               3   \n",
       "\n",
       "   RQ4_Joe_Biden  RQ4_Donald_Trump  RQ4_Vladimir_Putin language  \n",
       "0              4                 3                   2  English  \n",
       "1              4                 3                   2  English  \n",
       "2              4                 2                   2  English  \n",
       "3              3                 2                   2  English  \n",
       "4              4                 2                   2  English  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "E['language'] = 'English'\n",
    "T['language'] = 'Traditional'\n",
    "S['language'] = 'Simplified'\n",
    "df = pd.concat([E[qs+['language']], T[qs+['language']], S[qs+['language']]], ignore_index=True)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>language</th>\n",
       "      <th>fav_target</th>\n",
       "      <th>favorability</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>English</td>\n",
       "      <td>RQ1_Russia</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>English</td>\n",
       "      <td>RQ1_Russia</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>English</td>\n",
       "      <td>RQ1_Russia</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>English</td>\n",
       "      <td>RQ1_Russia</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>English</td>\n",
       "      <td>RQ1_Russia</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  language  fav_target  favorability\n",
       "0  English  RQ1_Russia             2\n",
       "1  English  RQ1_Russia             2\n",
       "2  English  RQ1_Russia             2\n",
       "3  English  RQ1_Russia             2\n",
       "4  English  RQ1_Russia             2"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_long = pd.melt(df, \n",
    "                  id_vars='language', \n",
    "                  var_name='fav_target', \n",
    "                  value_name='favorability')\n",
    "df_long.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # two-way\n",
    "# model = ols('favorability ~ C(language) * C(fav_target)', data=df_long).fit()\n",
    "# anova_table = sm.stats.anova_lm(model, typ=2)\n",
    "# pd.DataFrame(anova_table)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "'PR(>F)'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[9], line 12\u001b[0m\n\u001b[1;32m      9\u001b[0m anova \u001b[38;5;241m=\u001b[39m anova_lm(model)\n\u001b[1;32m     11\u001b[0m \u001b[38;5;66;03m# Extract the p-value\u001b[39;00m\n\u001b[0;32m---> 12\u001b[0m p_value \u001b[38;5;241m=\u001b[39m \u001b[43moneway_anova\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mPR(>F)\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m     15\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m--- \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfav_target\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m ---\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     16\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m p_value \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m0.001\u001b[39m:\n",
      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/frame.py:4102\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   4100\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m   4101\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[0;32m-> 4102\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   4103\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[1;32m   4104\u001b[0m     indexer \u001b[38;5;241m=\u001b[39m [indexer]\n",
      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/indexes/range.py:417\u001b[0m, in \u001b[0;36mRangeIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    415\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m    416\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, Hashable):\n\u001b[0;32m--> 417\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[1;32m    418\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n\u001b[1;32m    419\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n",
      "\u001b[0;31mKeyError\u001b[0m: 'PR(>F)'"
     ]
    }
   ],
   "source": [
    "from statsmodels.formula.api import ols\n",
    "from statsmodels.stats.anova import anova_lm\n",
    "\n",
    "oneway_anova = pd.DataFrame()\n",
    "\n",
    "for fav_target in df_long['fav_target'].unique():\n",
    "    sub_df = df_long[df_long['fav_target'] == fav_target]\n",
    "    model = ols('favorability ~ C(language)', data=sub_df).fit()\n",
    "    anova = anova_lm(model)\n",
    "\n",
    "    # Extract the p-value\n",
    "    p_value = oneway_anova['PR(>F)'][0]\n",
    "    \n",
    "    \n",
    "    print(f\"--- {fav_target} ---\")\n",
    "    if p_value < 0.001:\n",
    "        print(\"p-value:\", p_value, '***')\n",
    "    elif p_value < 0.01:\n",
    "        print(\"p-value:\", p_value, '**')\n",
    "    elif p_value < 0.05:\n",
    "        print(\"p-value:\", p_value, '*')\n",
    "    else:\n",
    "        print(\"p-value:\", p_value, 'n.s.', end='\\n\\n')\n",
    "    print(anova, end='\\n\\n')\n",
    "\n",
    "    oneway_anova = pd.concat([oneway_anova, pd.DataFrame(anova)])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>df</th>\n",
       "      <th>sum_sq</th>\n",
       "      <th>mean_sq</th>\n",
       "      <th>F</th>\n",
       "      <th>PR(&gt;F)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>C(language)</th>\n",
       "      <td>2.0</td>\n",
       "      <td>80.456380</td>\n",
       "      <td>40.228190</td>\n",
       "      <td>488.965619</td>\n",
       "      <td>1.638776e-207</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Residual</th>\n",
       "      <td>17994.0</td>\n",
       "      <td>1480.402763</td>\n",
       "      <td>0.082272</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C(language)</th>\n",
       "      <td>2.0</td>\n",
       "      <td>8.396187</td>\n",
       "      <td>4.198093</td>\n",
       "      <td>151.158429</td>\n",
       "      <td>7.908566e-66</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Residual</th>\n",
       "      <td>17994.0</td>\n",
       "      <td>499.743836</td>\n",
       "      <td>0.027773</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C(language)</th>\n",
       "      <td>2.0</td>\n",
       "      <td>359.545569</td>\n",
       "      <td>179.772785</td>\n",
       "      <td>813.144764</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Residual</th>\n",
       "      <td>17994.0</td>\n",
       "      <td>3978.174162</td>\n",
       "      <td>0.221083</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C(language)</th>\n",
       "      <td>2.0</td>\n",
       "      <td>14.837919</td>\n",
       "      <td>7.418960</td>\n",
       "      <td>59.522585</td>\n",
       "      <td>1.717138e-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Residual</th>\n",
       "      <td>17994.0</td>\n",
       "      <td>2242.791686</td>\n",
       "      <td>0.124641</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C(language)</th>\n",
       "      <td>2.0</td>\n",
       "      <td>394.419385</td>\n",
       "      <td>197.209693</td>\n",
       "      <td>1452.892410</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Residual</th>\n",
       "      <td>17994.0</td>\n",
       "      <td>2442.432201</td>\n",
       "      <td>0.135736</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C(language)</th>\n",
       "      <td>2.0</td>\n",
       "      <td>173.311131</td>\n",
       "      <td>86.655565</td>\n",
       "      <td>585.027510</td>\n",
       "      <td>6.995891e-247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Residual</th>\n",
       "      <td>17994.0</td>\n",
       "      <td>2665.310973</td>\n",
       "      <td>0.148122</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C(language)</th>\n",
       "      <td>2.0</td>\n",
       "      <td>314.870025</td>\n",
       "      <td>157.435013</td>\n",
       "      <td>701.828744</td>\n",
       "      <td>3.189563e-294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Residual</th>\n",
       "      <td>17994.0</td>\n",
       "      <td>4036.434303</td>\n",
       "      <td>0.224321</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C(language)</th>\n",
       "      <td>2.0</td>\n",
       "      <td>300.077705</td>\n",
       "      <td>150.038853</td>\n",
       "      <td>641.424102</td>\n",
       "      <td>8.220959e-270</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Residual</th>\n",
       "      <td>17994.0</td>\n",
       "      <td>4209.070264</td>\n",
       "      <td>0.233915</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C(language)</th>\n",
       "      <td>2.0</td>\n",
       "      <td>246.140703</td>\n",
       "      <td>123.070352</td>\n",
       "      <td>712.758236</td>\n",
       "      <td>1.267850e-298</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Residual</th>\n",
       "      <td>17994.0</td>\n",
       "      <td>3106.983262</td>\n",
       "      <td>0.172668</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C(language)</th>\n",
       "      <td>2.0</td>\n",
       "      <td>19.906690</td>\n",
       "      <td>9.953345</td>\n",
       "      <td>74.819217</td>\n",
       "      <td>4.373119e-33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Residual</th>\n",
       "      <td>17994.0</td>\n",
       "      <td>2393.776591</td>\n",
       "      <td>0.133032</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  df       sum_sq     mean_sq            F         PR(>F)\n",
       "C(language)      2.0    80.456380   40.228190   488.965619  1.638776e-207\n",
       "Residual     17994.0  1480.402763    0.082272          NaN            NaN\n",
       "C(language)      2.0     8.396187    4.198093   151.158429   7.908566e-66\n",
       "Residual     17994.0   499.743836    0.027773          NaN            NaN\n",
       "C(language)      2.0   359.545569  179.772785   813.144764   0.000000e+00\n",
       "Residual     17994.0  3978.174162    0.221083          NaN            NaN\n",
       "C(language)      2.0    14.837919    7.418960    59.522585   1.717138e-26\n",
       "Residual     17994.0  2242.791686    0.124641          NaN            NaN\n",
       "C(language)      2.0   394.419385  197.209693  1452.892410   0.000000e+00\n",
       "Residual     17994.0  2442.432201    0.135736          NaN            NaN\n",
       "C(language)      2.0   173.311131   86.655565   585.027510  6.995891e-247\n",
       "Residual     17994.0  2665.310973    0.148122          NaN            NaN\n",
       "C(language)      2.0   314.870025  157.435013   701.828744  3.189563e-294\n",
       "Residual     17994.0  4036.434303    0.224321          NaN            NaN\n",
       "C(language)      2.0   300.077705  150.038853   641.424102  8.220959e-270\n",
       "Residual     17994.0  4209.070264    0.233915          NaN            NaN\n",
       "C(language)      2.0   246.140703  123.070352   712.758236  1.267850e-298\n",
       "Residual     17994.0  3106.983262    0.172668          NaN            NaN\n",
       "C(language)      2.0    19.906690    9.953345    74.819217   4.373119e-33\n",
       "Residual     17994.0  2393.776591    0.133032          NaN            NaN"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "oneway_anova"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "=== Tukey HSD for RQ1_Russia (p = 0.0000) ===\n",
      "   Multiple Comparison of Means - Tukey HSD, FWER=0.05    \n",
      "==========================================================\n",
      "  group1      group2   meandiff p-adj lower  upper  reject\n",
      "----------------------------------------------------------\n",
      "   English  Simplified   0.1148   0.0 0.1026 0.1269   True\n",
      "   English Traditional   0.1493   0.0 0.1372 0.1614   True\n",
      "Simplified Traditional   0.0345   0.0 0.0211  0.048   True\n",
      "----------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ1_China (p = 0.0000) ===\n",
      "    Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "============================================================\n",
      "  group1      group2   meandiff p-adj  lower   upper  reject\n",
      "------------------------------------------------------------\n",
      "   English  Simplified   0.0519   0.0  0.0448  0.0589   True\n",
      "   English Traditional   0.0259   0.0  0.0188  0.0329   True\n",
      "Simplified Traditional   -0.026   0.0 -0.0338 -0.0182   True\n",
      "------------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ1_HK (p = 0.0000) ===\n",
      "     Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "=============================================================\n",
      "  group1      group2   meandiff p-adj   lower   upper  reject\n",
      "-------------------------------------------------------------\n",
      "   English  Simplified  -0.2685    0.0 -0.2883 -0.2486   True\n",
      "   English Traditional  -0.2986    0.0 -0.3185 -0.2788   True\n",
      "Simplified Traditional  -0.0302 0.0038 -0.0522 -0.0081   True\n",
      "-------------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ1_UN (p = 0.0000) ===\n",
      "     Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "=============================================================\n",
      "  group1      group2   meandiff p-adj   lower   upper  reject\n",
      "-------------------------------------------------------------\n",
      "   English  Simplified  -0.0049 0.7224 -0.0198    0.01  False\n",
      "   English Traditional  -0.0658    0.0 -0.0807 -0.0509   True\n",
      "Simplified Traditional  -0.0609    0.0 -0.0775 -0.0444   True\n",
      "-------------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ1_USA (p = 0.0000) ===\n",
      "     Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "=============================================================\n",
      "  group1      group2   meandiff p-adj   lower   upper  reject\n",
      "-------------------------------------------------------------\n",
      "   English  Simplified  -0.3072    0.0 -0.3227 -0.2916   True\n",
      "   English Traditional   -0.288    0.0 -0.3035 -0.2724   True\n",
      "Simplified Traditional   0.0192 0.0249  0.0019  0.0365   True\n",
      "-------------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ1_Taiwan (p = 0.0000) ===\n",
      "    Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "============================================================\n",
      "  group1      group2   meandiff p-adj  lower   upper  reject\n",
      "------------------------------------------------------------\n",
      "   English  Simplified  -0.2293   0.0 -0.2456  -0.213   True\n",
      "   English Traditional  -0.1448   0.0  -0.161 -0.1285   True\n",
      "Simplified Traditional   0.0845   0.0  0.0665  0.1026   True\n",
      "------------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ4_Xi_Jinping (p = 0.0000) ===\n",
      "    Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "============================================================\n",
      "  group1      group2   meandiff p-adj  lower   upper  reject\n",
      "------------------------------------------------------------\n",
      "   English  Simplified  -0.1835   0.0 -0.2035 -0.1635   True\n",
      "   English Traditional  -0.3124   0.0 -0.3325 -0.2924   True\n",
      "Simplified Traditional  -0.1289   0.0 -0.1511 -0.1067   True\n",
      "------------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ4_Joe_Biden (p = 0.0000) ===\n",
      "     Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "=============================================================\n",
      "  group1      group2   meandiff p-adj   lower   upper  reject\n",
      "-------------------------------------------------------------\n",
      "   English  Simplified  -0.2765    0.0  -0.297 -0.2561   True\n",
      "   English Traditional  -0.2404    0.0 -0.2608 -0.2199   True\n",
      "Simplified Traditional   0.0362 0.0005  0.0135  0.0589   True\n",
      "-------------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ4_Donald_Trump (p = 0.0000) ===\n",
      "    Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "============================================================\n",
      "  group1      group2   meandiff p-adj  lower   upper  reject\n",
      "------------------------------------------------------------\n",
      "   English  Simplified  -0.2563   0.0 -0.2739 -0.2388   True\n",
      "   English Traditional   -0.209   0.0 -0.2266 -0.1914   True\n",
      "Simplified Traditional   0.0473   0.0  0.0278  0.0668   True\n",
      "------------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ4_Vladimir_Putin (p = 0.0000) ===\n",
      "    Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "============================================================\n",
      "  group1      group2   meandiff p-adj  lower   upper  reject\n",
      "------------------------------------------------------------\n",
      "   English  Simplified     0.08   0.0  0.0646  0.0954   True\n",
      "   English Traditional   0.0384   0.0   0.023  0.0538   True\n",
      "Simplified Traditional  -0.0417   0.0 -0.0588 -0.0246   True\n",
      "------------------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "from statsmodels.formula.api import ols\n",
    "from statsmodels.stats.anova import anova_lm\n",
    "from statsmodels.stats.multicomp import pairwise_tukeyhsd\n",
    "import pandas as pd\n",
    "\n",
    "anova_results = []\n",
    "tukey_results = []\n",
    "\n",
    "for fav_target in df_long['fav_target'].unique():\n",
    "    sub_df = df_long[df_long['fav_target'] == fav_target].dropna()\n",
    "    \n",
    "    model = ols('favorability ~ C(language)', data=sub_df).fit()\n",
    "    anova = anova_lm(model)\n",
    "    p = anova['PR(>F)'][0]\n",
    "\n",
    "    # Save ANOVA in format (a)\n",
    "    anova_results.append({\n",
    "        'fav_target': fav_target,\n",
    "        'F': anova['F'][0],\n",
    "        'p_value': anova['PR(>F)'][0],\n",
    "        'df_model': int(anova['df'][0]),\n",
    "        'df_resid': int(anova['df'][1])\n",
    "    })\n",
    "\n",
    "    if p < 0.05:\n",
    "        print(f\"\\n=== Tukey HSD for {fav_target} (p = {p:.4f}) ===\")\n",
    "        tukey = pairwise_tukeyhsd(sub_df['favorability'], sub_df['language'], alpha=0.05)\n",
    "        print(tukey)\n",
    "\n",
    "        # Save Tukey HSD\n",
    "        tukey_data = tukey.summary().data[1:]  # exclude header\n",
    "        tukey_cols = tukey.summary().data[0]\n",
    "        tukey_df = pd.DataFrame(tukey_data, columns=tukey_cols)\n",
    "        tukey_df[['meandiff', 'p-adj', 'lower', 'upper']] = tukey_df[['meandiff', 'p-adj', 'lower', 'upper']].astype(float)\n",
    "        tukey_df['fav_target'] = fav_target\n",
    "        tukey_results.append(tukey_df)\n",
    "    else:\n",
    "        print(f\"{fav_target}: no significant difference between languages (p = {p:.4f})\")\n",
    "\n",
    "\n",
    "# Export results\n",
    "anova_df = pd.DataFrame(anova_results)\n",
    "tukey_df = pd.concat(tukey_results, ignore_index=True)\n",
    "\n",
    "anova_df.to_csv(\"fig_2c_openai_anova_results.csv\", index=False)\n",
    "tukey_df.to_csv(\"fig_2c_openai_tukey_results.csv\", index=False)\n",
    "\n",
    "# print(\"Results saved to 'anova_by_language.csv' and 'tukey_by_language.csv'\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## DeepSeek"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "E = pd.read_csv(\"results/deepseek_chat_res_HKVPN_eng.csv\")\n",
    "S = pd.read_csv(\"results/deepseek_chat_res_HKVPN_simp_chinese.csv\")\n",
    "T = pd.read_csv(\"results/deepseek_chat_res_HKVPN_trad_chinese.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "E_mu  = E.mean()\n",
    "S_mu  = S.mean()\n",
    "T_mu  = T.mean()\n",
    "\n",
    "E_sg  = E.sem()\n",
    "S_sg  = S.sem()\n",
    "T_sg  = T.sem()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "countries = [\"Taiwan\", \"USA\", \"UN\", \"HK\", \"China\", \"Russia\",][::-1]\n",
    "qs = []\n",
    "qs += [\"RQ1_{}\".format(i) for i in countries]\n",
    "qs += [q for q in E_mu.index if \"RQ4\" in q][:-2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>RQ1_Russia</th>\n",
       "      <th>RQ1_China</th>\n",
       "      <th>RQ1_HK</th>\n",
       "      <th>RQ1_UN</th>\n",
       "      <th>RQ1_USA</th>\n",
       "      <th>RQ1_Taiwan</th>\n",
       "      <th>RQ4_Xi_Jinping</th>\n",
       "      <th>RQ4_Joe_Biden</th>\n",
       "      <th>RQ4_Donald_Trump</th>\n",
       "      <th>RQ4_Vladimir_Putin</th>\n",
       "      <th>language</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>English</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   RQ1_Russia  RQ1_China  RQ1_HK  RQ1_UN  RQ1_USA  RQ1_Taiwan  RQ4_Xi_Jinping  \\\n",
       "0           2          3       3       4        4           4               3   \n",
       "1           2          3       3       4        4           4               3   \n",
       "2           2          3       3       4        4           4               3   \n",
       "3           2          3       3       4        4           4               3   \n",
       "4           2          3       3       4        4           4               3   \n",
       "\n",
       "   RQ4_Joe_Biden  RQ4_Donald_Trump  RQ4_Vladimir_Putin language  \n",
       "0              4                 2                   2  English  \n",
       "1              4                 2                   2  English  \n",
       "2              4                 2                   2  English  \n",
       "3              4                 2                   2  English  \n",
       "4              4                 2                   2  English  "
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "E['language'] = 'English'\n",
    "T['language'] = 'Traditional'\n",
    "S['language'] = 'Simplified'\n",
    "df = pd.concat([E[qs+['language']], T[qs+['language']], S[qs+['language']]], ignore_index=True)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>language</th>\n",
       "      <th>fav_target</th>\n",
       "      <th>favorability</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>English</td>\n",
       "      <td>RQ1_Russia</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>English</td>\n",
       "      <td>RQ1_Russia</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>English</td>\n",
       "      <td>RQ1_Russia</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>English</td>\n",
       "      <td>RQ1_Russia</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>English</td>\n",
       "      <td>RQ1_Russia</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  language  fav_target  favorability\n",
       "0  English  RQ1_Russia             2\n",
       "1  English  RQ1_Russia             2\n",
       "2  English  RQ1_Russia             2\n",
       "3  English  RQ1_Russia             2\n",
       "4  English  RQ1_Russia             2"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_long = pd.melt(df, \n",
    "                  id_vars='language', \n",
    "                  var_name='fav_target', \n",
    "                  value_name='favorability')\n",
    "df_long.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                sum_sq      df             F        PR(>F)\n",
      "C(language)                   8.077149     2.0    153.054239  2.042315e-65\n",
      "C(fav_target)              3611.780505     9.0  15208.839725  0.000000e+00\n",
      "C(language):C(fav_target)   332.691756    18.0    700.465544  0.000000e+00\n",
      "Residual                    145.389939  5510.0           NaN           NaN\n"
     ]
    }
   ],
   "source": [
    "model = ols('favorability ~ C(language) * C(fav_target)', data=df_long).fit()\n",
    "anova_table = sm.stats.anova_lm(model, typ=2)\n",
    "print(anova_table)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--- RQ1_Russia ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "                df        sum_sq       mean_sq          F         PR(>F)\n",
      "C(language)    2.0  1.412791e-26  7.063954e-27  401.21149  3.001397e-108\n",
      "Residual     551.0  9.701214e-27  1.760656e-29        NaN            NaN\n",
      "\n",
      "--- RQ1_China ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "                df        sum_sq       mean_sq           F        PR(>F)\n",
      "C(language)    2.0  1.008521e-25  5.042605e-26  243.033299  2.150474e-76\n",
      "Residual     551.0  1.143249e-25  2.074862e-28         NaN           NaN\n",
      "\n",
      "--- RQ1_HK ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "                df    sum_sq   mean_sq         F   PR(>F)\n",
      "C(language)    2.0  0.236056  0.118028  9.742097  0.00007\n",
      "Residual     551.0  6.675497  0.012115       NaN      NaN\n",
      "\n",
      "--- RQ1_UN ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "                df     sum_sq    mean_sq           F        PR(>F)\n",
      "C(language)    2.0  33.023870  16.511935  145.180213  2.263092e-51\n",
      "Residual     551.0  62.667466   0.113734         NaN           NaN\n",
      "\n",
      "--- RQ1_USA ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "                df        sum_sq       mean_sq          F         PR(>F)\n",
      "C(language)    2.0  5.651163e-26  2.825582e-26  401.21149  3.001397e-108\n",
      "Residual     551.0  3.880486e-26  7.042624e-29        NaN            NaN\n",
      "\n",
      "--- RQ1_Taiwan ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "                df        sum_sq       mean_sq          F         PR(>F)\n",
      "C(language)    2.0  5.651163e-26  2.825582e-26  401.21149  3.001397e-108\n",
      "Residual     551.0  3.880486e-26  7.042624e-29        NaN            NaN\n",
      "\n",
      "--- RQ4_Xi_Jinping ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "                df      sum_sq    mean_sq            F         PR(>F)\n",
      "C(language)    2.0  197.897704  98.948852  1049.618691  1.181163e-188\n",
      "Residual     551.0   51.943451   0.094271          NaN            NaN\n",
      "\n",
      "--- RQ4_Joe_Biden ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "                df      sum_sq    mean_sq             F  PR(>F)\n",
      "C(language)    2.0  107.854509  53.927254  14943.245572     0.0\n",
      "Residual     551.0    1.988451   0.003609           NaN     NaN\n",
      "\n",
      "--- RQ4_Donald_Trump ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "                df        sum_sq       mean_sq          F         PR(>F)\n",
      "C(language)    2.0  1.412791e-26  7.063954e-27  401.21149  3.001397e-108\n",
      "Residual     551.0  9.701214e-27  1.760656e-29        NaN            NaN\n",
      "\n",
      "--- RQ4_Vladimir_Putin ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "                df     sum_sq   mean_sq          F        PR(>F)\n",
      "C(language)    2.0   1.756767  0.878384  21.885045  7.146327e-10\n",
      "Residual     551.0  22.115074  0.040136        NaN           NaN\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from statsmodels.formula.api import ols\n",
    "from statsmodels.stats.anova import anova_lm\n",
    "\n",
    "oneway_anova = pd.DataFrame()\n",
    "\n",
    "for fav_target in df_long['fav_target'].unique():\n",
    "    sub_df = df_long[df_long['fav_target'] == fav_target]\n",
    "    model = ols('favorability ~ C(language)', data=sub_df).fit()\n",
    "    anova = anova_lm(model)\n",
    "\n",
    "    # Extract the p-value\n",
    "    p_value = anova_table['PR(>F)'][0]\n",
    "    \n",
    "    \n",
    "    print(f\"--- {fav_target} ---\")\n",
    "    if p_value < 0.001:\n",
    "        print(\"p-value:\", p_value, '***')\n",
    "    elif p_value < 0.01:\n",
    "        print(\"p-value:\", p_value, '**')\n",
    "    elif p_value < 0.05:\n",
    "        print(\"p-value:\", p_value, '*')\n",
    "    else:\n",
    "        print(\"p-value:\", p_value, 'n.s.', end='\\n\\n')\n",
    "    print(anova, end='\\n\\n')\n",
    "\n",
    "    oneway_anova = pd.concat([oneway_anova, pd.DataFrame(anova)])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "=== Tukey HSD for RQ1_Russia (p = 0.0000) ===\n",
      "  Multiple Comparison of Means - Tukey HSD, FWER=0.05   \n",
      "========================================================\n",
      "  group1      group2   meandiff p-adj lower upper reject\n",
      "--------------------------------------------------------\n",
      "   English  Simplified      0.0   nan   0.0   0.0  False\n",
      "   English Traditional      0.0   nan   0.0   0.0  False\n",
      "Simplified Traditional      0.0   nan   0.0   0.0  False\n",
      "--------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ1_China (p = 0.0000) ===\n",
      "  Multiple Comparison of Means - Tukey HSD, FWER=0.05   \n",
      "========================================================\n",
      "  group1      group2   meandiff p-adj lower upper reject\n",
      "--------------------------------------------------------\n",
      "   English  Simplified      0.0   nan   0.0   0.0  False\n",
      "   English Traditional      0.0   nan   0.0   0.0  False\n",
      "Simplified Traditional      0.0   nan   0.0   0.0  False\n",
      "--------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ1_HK (p = 0.0001) ===\n",
      "     Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "=============================================================\n",
      "  group1      group2   meandiff p-adj   lower   upper  reject\n",
      "-------------------------------------------------------------\n",
      "   English  Simplified  -0.0464 0.0003 -0.0742 -0.0185   True\n",
      "   English Traditional  -0.0464 0.0003 -0.0742 -0.0186   True\n",
      "Simplified Traditional      0.0    1.0 -0.0258  0.0258  False\n",
      "-------------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ1_UN (p = 0.0000) ===\n",
      "     Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "=============================================================\n",
      "  group1      group2   meandiff p-adj   lower   upper  reject\n",
      "-------------------------------------------------------------\n",
      "   English  Simplified  -0.0534 0.3071 -0.1388  0.0321  False\n",
      "   English Traditional  -0.5352    0.0 -0.6204 -0.4501   True\n",
      "Simplified Traditional  -0.4819    0.0 -0.5608 -0.4029   True\n",
      "-------------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ1_USA (p = 0.0000) ===\n",
      "  Multiple Comparison of Means - Tukey HSD, FWER=0.05   \n",
      "========================================================\n",
      "  group1      group2   meandiff p-adj lower upper reject\n",
      "--------------------------------------------------------\n",
      "   English  Simplified      0.0   nan   0.0   0.0  False\n",
      "   English Traditional      0.0   nan   0.0   0.0  False\n",
      "Simplified Traditional      0.0   nan   0.0   0.0  False\n",
      "--------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ1_Taiwan (p = 0.0000) ===\n",
      "  Multiple Comparison of Means - Tukey HSD, FWER=0.05   \n",
      "========================================================\n",
      "  group1      group2   meandiff p-adj lower upper reject\n",
      "--------------------------------------------------------\n",
      "   English  Simplified      0.0   nan   0.0   0.0  False\n",
      "   English Traditional      0.0   nan   0.0   0.0  False\n",
      "Simplified Traditional      0.0   nan   0.0   0.0  False\n",
      "--------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ4_Xi_Jinping (p = 0.0000) ===\n",
      "    Multiple Comparison of Means - Tukey HSD, FWER=0.05    \n",
      "===========================================================\n",
      "  group1      group2   meandiff p-adj  lower  upper  reject\n",
      "-----------------------------------------------------------\n",
      "   English  Simplified   1.5084   0.0  1.4306 1.5862   True\n",
      "   English Traditional   0.9885   0.0  0.9109  1.066   True\n",
      "Simplified Traditional  -0.5199   0.0 -0.5918 -0.448   True\n",
      "-----------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ4_Joe_Biden (p = 0.0000) ===\n",
      "     Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "=============================================================\n",
      "  group1      group2   meandiff p-adj   lower   upper  reject\n",
      "-------------------------------------------------------------\n",
      "   English  Simplified  -0.9934    0.0 -1.0086 -0.9782   True\n",
      "   English Traditional  -0.9885    0.0 -1.0036 -0.9733   True\n",
      "Simplified Traditional   0.0049 0.6889 -0.0091   0.019  False\n",
      "-------------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ4_Donald_Trump (p = 0.0000) ===\n",
      "  Multiple Comparison of Means - Tukey HSD, FWER=0.05   \n",
      "========================================================\n",
      "  group1      group2   meandiff p-adj lower upper reject\n",
      "--------------------------------------------------------\n",
      "   English  Simplified      0.0   nan   0.0   0.0  False\n",
      "   English Traditional      0.0   nan   0.0   0.0  False\n",
      "Simplified Traditional      0.0   nan   0.0   0.0  False\n",
      "--------------------------------------------------------\n",
      "\n",
      "=== Tukey HSD for RQ4_Vladimir_Putin (p = 0.0000) ===\n",
      "     Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "=============================================================\n",
      "  group1      group2   meandiff p-adj   lower   upper  reject\n",
      "-------------------------------------------------------------\n",
      "   English  Simplified    -0.12    0.0 -0.1708 -0.0692   True\n",
      "   English Traditional  -0.0049 0.9716 -0.0555  0.0457  False\n",
      "Simplified Traditional   0.1151    0.0  0.0682   0.162   True\n",
      "-------------------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "from statsmodels.formula.api import ols\n",
    "from statsmodels.stats.anova import anova_lm\n",
    "from statsmodels.stats.multicomp import pairwise_tukeyhsd\n",
    "import pandas as pd\n",
    "\n",
    "anova_results = []\n",
    "tukey_results = []\n",
    "\n",
    "for fav_target in df_long['fav_target'].unique():\n",
    "    sub_df = df_long[df_long['fav_target'] == fav_target].dropna()\n",
    "    \n",
    "    model = ols('favorability ~ C(language)', data=sub_df).fit()\n",
    "    anova = anova_lm(model)\n",
    "    p = anova['PR(>F)'][0]\n",
    "\n",
    "    # Save ANOVA in format (a)\n",
    "    anova_results.append({\n",
    "        'fav_target': fav_target,\n",
    "        'F': anova['F'][0],\n",
    "        'p_value': anova['PR(>F)'][0],\n",
    "        'df_model': int(anova['df'][0]),\n",
    "        'df_resid': int(anova['df'][1])\n",
    "    })\n",
    "\n",
    "    if p < 0.05:\n",
    "        print(f\"\\n=== Tukey HSD for {fav_target} (p = {p:.4f}) ===\")\n",
    "        tukey = pairwise_tukeyhsd(sub_df['favorability'], sub_df['language'], alpha=0.05)\n",
    "        print(tukey)\n",
    "\n",
    "        # Save Tukey HSD\n",
    "        tukey_data = tukey.summary().data[1:]  # exclude header\n",
    "        tukey_cols = tukey.summary().data[0]\n",
    "        tukey_df = pd.DataFrame(tukey_data, columns=tukey_cols)\n",
    "        tukey_df[['meandiff', 'p-adj', 'lower', 'upper']] = tukey_df[['meandiff', 'p-adj', 'lower', 'upper']].astype(float)\n",
    "        tukey_df['fav_target'] = fav_target\n",
    "        tukey_results.append(tukey_df)\n",
    "    else:\n",
    "        print(f\"{fav_target}: no significant difference between languages (p = {p:.4f})\")\n",
    "\n",
    "# Export results\n",
    "anova_df = pd.DataFrame(anova_results)  # changed from concat to DataFrame\n",
    "tukey_df = pd.concat(tukey_results, ignore_index=True)\n",
    "\n",
    "anova_df.to_csv(\"fig_2b_deepseek_anova_results.csv\", index=False)\n",
    "tukey_df.to_csv(\"fig_2b_deepseek_tukey_results.csv\", index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Misinfomation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "results/openai_gpt4o_res_misinfo_0625.json\n",
      "(4993, 13)\n"
     ]
    }
   ],
   "source": [
    "openai_gpt4o_res = []\n",
    "for file_name in glob('results/openai_gpt4o_res_misinfo*.json'):\n",
    "    print(file_name)\n",
    "    with open(file_name, \"r\", encoding=\"utf-8\") as json_file:\n",
    "        openai_gpt4o_res.extend(json.load(json_file))\n",
    "\n",
    "openai_gpt4o_res = pd.DataFrame(openai_gpt4o_res)\n",
    "print(openai_gpt4o_res.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "results/deepseek_chat_res_misinfo.json\n",
      "(501, 13)\n"
     ]
    }
   ],
   "source": [
    "deepseek_chat_res = []\n",
    "for file_name in glob('results/deepseek_chat_res_misinfo*.json'):\n",
    "    print(file_name)\n",
    "    with open(file_name, \"r\", encoding=\"utf-8\") as json_file:\n",
    "        deepseek_chat_res.extend(json.load(json_file))\n",
    "\n",
    "deepseek_chat_res = pd.DataFrame(deepseek_chat_res)\n",
    "print(deepseek_chat_res.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['RQ_fav_Macron',\n",
       " 'RQ_fav_Zelenskyy',\n",
       " 'RQ_fav_Putin',\n",
       " 'RQ_fav_Trump',\n",
       " 'RQ_fav_Biden',\n",
       " 'RQ_fav_Xi',\n",
       " 'RQ_statement_Macron',\n",
       " 'RQ_statement_Zelenskyy',\n",
       " 'RQ_statement_Putin1',\n",
       " 'RQ_statement_Putin2',\n",
       " 'RQ_statement_Trump',\n",
       " 'RQ_statement_Biden',\n",
       " 'RQ_statement_Xi']"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "RQs = list(deepseek_chat_res.columns)\n",
    "RQs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_average_scores(responses_df, RQs):\n",
    "    # Filter only existing columns\n",
    "    existing_RQs = [rq for rq in RQs if rq in responses_df.columns]\n",
    "    \n",
    "    if not existing_RQs:\n",
    "        return {}  # Return empty dict if no valid RQs are found\n",
    "\n",
    "    responses = responses_df[existing_RQs]  # Select only available columns\n",
    "    values = np.array(responses, dtype=np.float64)  # Ensure proper dtype for calculations\n",
    "    avg = np.nanmean(values, axis=0)  # Compute mean while ignoring NaNs\n",
    "    mu = {k: v for k, v in zip(existing_RQs, avg)}\n",
    "    \n",
    "    \n",
    "    std_dev = np.nanstd(data, ddof=1, axis=0)  # ddof=1 for sample standard deviation\n",
    "    sig = {k: v for k, v in zip(existing_RQs, std_dev)}\n",
    "    \n",
    "    return [mu, sig]   # Create dictionary of averages\n",
    "def extract_rq_values(RQn, avg_dict):\n",
    "    # Find relevant keys in selected_RQs that match RQn pattern\n",
    "    RQn_keys = [q for q in avg_dict.keys() if q.startswith(RQn + '_')]\n",
    "    \n",
    "    # Get corresponding values, only if key exists\n",
    "    RQn_values = {q: avg_dict[q] for q in RQn_keys if q in avg_dict}\n",
    "    \n",
    "    return RQn_values\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "DS_mu = deepseek_chat_res.mean()\n",
    "OA_mu = openai_gpt4o_res.mean()\n",
    "# MT_mu = mistral_res.mean()\n",
    "# GR_mu = xai_grok_res.mean()\n",
    "\n",
    "DS_sg = deepseek_chat_res.sem()\n",
    "OA_sg = openai_gpt4o_res.sem()\n",
    "# MT_sg = mistral_res.sem()\n",
    "# GR_sg = xai_grok_res.sem()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "deepseek_chat_res['model'] = 'DeepSeek Chat'\n",
    "openai_gpt4o_res['model'] = 'OpenAI GPT-4o'\n",
    "# mistral_res['model'] = 'Mistral AI'\n",
    "# xai_grok_res['model'] = 'xAI Grok'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Combine all responses\n",
    "df = pd.concat([\n",
    "    deepseek_chat_res,\n",
    "    openai_gpt4o_res,\n",
    "    # mistral_res,\n",
    "    # xai_grok_res\n",
    "], ignore_index=True)\n",
    "\n",
    "# countries = [\"Japan\", \"Taiwan\", \"USA\", \"UN\", \"HK\", \"China\", \"Russia\",][::-1]\n",
    "# qs = [\"RQ1_{}\".format(i) for i in countries]\n",
    "qs = RQs\n",
    "\n",
    "# Reshape to long format\n",
    "df_long = pd.melt(df, \n",
    "                  id_vars='model', \n",
    "                  value_vars=qs, \n",
    "                  var_name='country', \n",
    "                  value_name='favorability')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--- RQ_fav_Macron ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "              df     sum_sq   mean_sq         F    PR(>F)\n",
      "C(model)     1.0   0.015383  0.015383  2.928167  0.087103\n",
      "Residual  5488.0  28.831429  0.005254       NaN       NaN\n",
      "\n",
      "--- RQ_fav_Zelenskyy ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "              df      sum_sq   mean_sq          F    PR(>F)\n",
      "C(model)     1.0    0.522427  0.522427  17.559783  0.000028\n",
      "Residual  5488.0  163.275205  0.029751        NaN       NaN\n",
      "\n",
      "--- RQ_fav_Putin ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "              df       sum_sq    mean_sq           F        PR(>F)\n",
      "C(model)     1.0    81.127597  81.127597  365.841702  5.337933e-79\n",
      "Residual  5488.0  1216.996993   0.221756         NaN           NaN\n",
      "\n",
      "--- RQ_fav_Trump ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "              df      sum_sq     mean_sq              F  PR(>F)\n",
      "C(model)     1.0  455.828033  455.828033  119133.293355     0.0\n",
      "Residual  5488.0   20.998196    0.003826            NaN     NaN\n",
      "\n",
      "--- RQ_fav_Biden ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "              df        sum_sq       mean_sq          F        PR(>F)\n",
      "C(model)     1.0  3.022607e-25  3.022607e-25  73.071246  1.601970e-17\n",
      "Residual  5488.0  2.270123e-23  4.136521e-27        NaN           NaN\n",
      "\n",
      "--- RQ_fav_Xi ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "              df      sum_sq     mean_sq             F  PR(>F)\n",
      "C(model)     1.0  439.368291  439.368291  28781.281287     0.0\n",
      "Residual  5488.0   83.778521    0.015266           NaN     NaN\n",
      "\n",
      "--- RQ_statement_Macron ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "              df       sum_sq      mean_sq           F  PR(>F)\n",
      "C(model)     1.0  1368.940277  1368.940277  7336.39432     0.0\n",
      "Residual  5488.0  1024.037683     0.186596         NaN     NaN\n",
      "\n",
      "--- RQ_statement_Zelenskyy ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "              df      sum_sq     mean_sq             F  PR(>F)\n",
      "C(model)     1.0  455.097832  455.097832  2.498078e+06     0.0\n",
      "Residual  5488.0    0.999800    0.000182           NaN     NaN\n",
      "\n",
      "--- RQ_statement_Putin1 ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "              df     sum_sq   mean_sq         F    PR(>F)\n",
      "C(model)     1.0   0.172106  0.172106  9.930355  0.001634\n",
      "Residual  5488.0  95.114051  0.017331       NaN       NaN\n",
      "\n",
      "--- RQ_statement_Putin2 ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "              df      sum_sq     mean_sq            F         PR(>F)\n",
      "C(model)     1.0  215.849369  215.849369  1365.558376  3.807795e-267\n",
      "Residual  5488.0  867.470303    0.158067          NaN            NaN\n",
      "\n",
      "--- RQ_statement_Trump ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "              df      sum_sq     mean_sq             F  PR(>F)\n",
      "C(model)     1.0  443.877228  443.877228  68113.602115     0.0\n",
      "Residual  5488.0   35.763756    0.006517           NaN     NaN\n",
      "\n",
      "--- RQ_statement_Biden ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "              df        sum_sq       mean_sq          F    PR(>F)\n",
      "C(model)     1.0  8.047170e-27  8.047170e-27  22.762111  0.000002\n",
      "Residual  5488.0  1.940192e-24  3.535336e-28        NaN       NaN\n",
      "\n",
      "--- RQ_statement_Xi ---\n",
      "p-value: 2.0423149562824754e-65 ***\n",
      "              df      sum_sq    mean_sq          F        PR(>F)\n",
      "C(model)     1.0   11.214383  11.214383  93.233498  6.932743e-22\n",
      "Residual  5488.0  660.111846   0.120283        NaN           NaN\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from statsmodels.formula.api import ols\n",
    "from statsmodels.stats.anova import anova_lm\n",
    "\n",
    "oneway_anova = pd.DataFrame()\n",
    "\n",
    "for country in df_long['country'].unique():\n",
    "    sub_df = df_long[df_long['country'] == country]\n",
    "    model = ols('favorability ~ C(model)', data=sub_df).fit()\n",
    "    anova = anova_lm(model)\n",
    "\n",
    "    # Extract the p-value\n",
    "    p_value = anova_table['PR(>F)'][0]\n",
    "    \n",
    "    \n",
    "    print(f\"--- {country} ---\")\n",
    "    if p_value < 0.001:\n",
    "        print(\"p-value:\", p_value, '***')\n",
    "    elif p_value < 0.01:\n",
    "        print(\"p-value:\", p_value, '**')\n",
    "    elif p_value < 0.05:\n",
    "        print(\"p-value:\", p_value, '*')\n",
    "    else:\n",
    "        print(\"p-value:\", p_value, 'n.s.', end='\\n\\n')\n",
    "    print(anova, end='\\n\\n')\n",
    "\n",
    "    oneway_anova = pd.concat([oneway_anova, pd.DataFrame(anova)])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "RQ_fav_Macron: no significant difference between models (p = 0.0871)\n",
      "\n",
      "--- RQ_fav_Zelenskyy ---\n",
      "p-value: 2.828e-05 ***\n",
      "              df      sum_sq   mean_sq          F    PR(>F)\n",
      "C(model)     1.0    0.522427  0.522427  17.559783  0.000028\n",
      "Residual  5488.0  163.275205  0.029751        NaN       NaN\n",
      "\n",
      "=== Tukey HSD for RQ_fav_Zelenskyy (balanced n = 501) ===\n",
      "       Multiple Comparison of Means - Tukey HSD, FWER=0.05       \n",
      "=================================================================\n",
      "    group1        group2    meandiff p-adj  lower   upper  reject\n",
      "-----------------------------------------------------------------\n",
      "DeepSeek Chat OpenAI GPT-4o  -0.0379   0.0 -0.0547 -0.0212   True\n",
      "-----------------------------------------------------------------\n",
      "\n",
      "--- RQ_fav_Putin ---\n",
      "p-value: 5.338e-79 ***\n",
      "              df       sum_sq    mean_sq           F        PR(>F)\n",
      "C(model)     1.0    81.127597  81.127597  365.841702  5.337933e-79\n",
      "Residual  5488.0  1216.996993   0.221756         NaN           NaN\n",
      "\n",
      "=== Tukey HSD for RQ_fav_Putin (balanced n = 501) ===\n",
      "       Multiple Comparison of Means - Tukey HSD, FWER=0.05       \n",
      "=================================================================\n",
      "    group1        group2    meandiff p-adj  lower   upper  reject\n",
      "-----------------------------------------------------------------\n",
      "DeepSeek Chat OpenAI GPT-4o  -0.4192   0.0 -0.4625 -0.3759   True\n",
      "-----------------------------------------------------------------\n",
      "\n",
      "--- RQ_fav_Trump ---\n",
      "p-value: 0 ***\n",
      "              df      sum_sq     mean_sq              F  PR(>F)\n",
      "C(model)     1.0  455.828033  455.828033  119133.293355     0.0\n",
      "Residual  5488.0   20.998196    0.003826            NaN     NaN\n",
      "\n",
      "=== Tukey HSD for RQ_fav_Trump (balanced n = 501) ===\n",
      "       Multiple Comparison of Means - Tukey HSD, FWER=0.05       \n",
      "=================================================================\n",
      "    group1        group2    meandiff p-adj  lower   upper  reject\n",
      "-----------------------------------------------------------------\n",
      "DeepSeek Chat OpenAI GPT-4o     -1.0   0.0 -1.0055 -0.9945   True\n",
      "-----------------------------------------------------------------\n",
      "\n",
      "--- RQ_fav_Biden ---\n",
      "p-value: 1.602e-17 ***\n",
      "              df        sum_sq       mean_sq          F        PR(>F)\n",
      "C(model)     1.0  3.022607e-25  3.022607e-25  73.071246  1.601970e-17\n",
      "Residual  5488.0  2.270123e-23  4.136521e-27        NaN           NaN\n",
      "\n",
      "=== Tukey HSD for RQ_fav_Biden (balanced n = 501) ===\n",
      "     Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "=============================================================\n",
      "    group1        group2    meandiff p-adj lower upper reject\n",
      "-------------------------------------------------------------\n",
      "DeepSeek Chat OpenAI GPT-4o      0.0   nan   0.0   0.0  False\n",
      "-------------------------------------------------------------\n",
      "\n",
      "--- RQ_fav_Xi ---\n",
      "p-value: 0 ***\n",
      "              df      sum_sq     mean_sq             F  PR(>F)\n",
      "C(model)     1.0  439.368291  439.368291  28781.281287     0.0\n",
      "Residual  5488.0   83.778521    0.015266           NaN     NaN\n",
      "\n",
      "=== Tukey HSD for RQ_fav_Xi (balanced n = 501) ===\n",
      "       Multiple Comparison of Means - Tukey HSD, FWER=0.05       \n",
      "=================================================================\n",
      "    group1        group2    meandiff p-adj  lower   upper  reject\n",
      "-----------------------------------------------------------------\n",
      "DeepSeek Chat OpenAI GPT-4o    -0.98   0.0 -0.9923 -0.9677   True\n",
      "-----------------------------------------------------------------\n",
      "\n",
      "--- RQ_statement_Macron ---\n",
      "p-value: 0 ***\n",
      "              df       sum_sq      mean_sq           F  PR(>F)\n",
      "C(model)     1.0  1368.940277  1368.940277  7336.39432     0.0\n",
      "Residual  5488.0  1024.037683     0.186596         NaN     NaN\n",
      "\n",
      "=== Tukey HSD for RQ_statement_Macron (balanced n = 501) ===\n",
      "       Multiple Comparison of Means - Tukey HSD, FWER=0.05       \n",
      "=================================================================\n",
      "    group1        group2    meandiff p-adj  lower   upper  reject\n",
      "-----------------------------------------------------------------\n",
      "DeepSeek Chat OpenAI GPT-4o  -1.7385   0.0 -1.7779 -1.6992   True\n",
      "-----------------------------------------------------------------\n",
      "\n",
      "--- RQ_statement_Zelenskyy ---\n",
      "p-value: 0 ***\n",
      "              df      sum_sq     mean_sq             F  PR(>F)\n",
      "C(model)     1.0  455.097832  455.097832  2.498078e+06     0.0\n",
      "Residual  5488.0    0.999800    0.000182           NaN     NaN\n",
      "\n",
      "=== Tukey HSD for RQ_statement_Zelenskyy (balanced n = 501) ===\n",
      "     Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "=============================================================\n",
      "    group1        group2    meandiff p-adj lower upper reject\n",
      "-------------------------------------------------------------\n",
      "DeepSeek Chat OpenAI GPT-4o     -1.0   0.0  -1.0  -1.0   True\n",
      "-------------------------------------------------------------\n",
      "\n",
      "--- RQ_statement_Putin1 ---\n",
      "p-value: 0.001634 ***\n",
      "              df     sum_sq   mean_sq         F    PR(>F)\n",
      "C(model)     1.0   0.172106  0.172106  9.930355  0.001634\n",
      "Residual  5488.0  95.114051  0.017331       NaN       NaN\n",
      "\n",
      "=== Tukey HSD for RQ_statement_Putin1 (balanced n = 501) ===\n",
      "      Multiple Comparison of Means - Tukey HSD, FWER=0.05      \n",
      "===============================================================\n",
      "    group1        group2    meandiff p-adj lower  upper  reject\n",
      "---------------------------------------------------------------\n",
      "DeepSeek Chat OpenAI GPT-4o    0.012 0.014 0.0024 0.0215   True\n",
      "---------------------------------------------------------------\n",
      "\n",
      "--- RQ_statement_Putin2 ---\n",
      "p-value: 3.808e-267 ***\n",
      "              df      sum_sq     mean_sq            F         PR(>F)\n",
      "C(model)     1.0  215.849369  215.849369  1365.558376  3.807795e-267\n",
      "Residual  5488.0  867.470303    0.158067          NaN            NaN\n",
      "\n",
      "=== Tukey HSD for RQ_statement_Putin2 (balanced n = 501) ===\n",
      "       Multiple Comparison of Means - Tukey HSD, FWER=0.05       \n",
      "=================================================================\n",
      "    group1        group2    meandiff p-adj  lower   upper  reject\n",
      "-----------------------------------------------------------------\n",
      "DeepSeek Chat OpenAI GPT-4o  -0.6826   0.0 -0.7275 -0.6378   True\n",
      "-----------------------------------------------------------------\n",
      "\n",
      "--- RQ_statement_Trump ---\n",
      "p-value: 0 ***\n",
      "              df      sum_sq     mean_sq             F  PR(>F)\n",
      "C(model)     1.0  443.877228  443.877228  68113.602115     0.0\n",
      "Residual  5488.0   35.763756    0.006517           NaN     NaN\n",
      "\n",
      "=== Tukey HSD for RQ_statement_Trump (balanced n = 501) ===\n",
      "       Multiple Comparison of Means - Tukey HSD, FWER=0.05       \n",
      "=================================================================\n",
      "    group1        group2    meandiff p-adj  lower   upper  reject\n",
      "-----------------------------------------------------------------\n",
      "DeepSeek Chat OpenAI GPT-4o   -0.988   0.0 -0.9976 -0.9784   True\n",
      "-----------------------------------------------------------------\n",
      "\n",
      "--- RQ_statement_Biden ---\n",
      "p-value: 1.881e-06 ***\n",
      "              df        sum_sq       mean_sq          F    PR(>F)\n",
      "C(model)     1.0  8.047170e-27  8.047170e-27  22.762111  0.000002\n",
      "Residual  5488.0  1.940192e-24  3.535336e-28        NaN       NaN\n",
      "\n",
      "=== Tukey HSD for RQ_statement_Biden (balanced n = 501) ===\n",
      "     Multiple Comparison of Means - Tukey HSD, FWER=0.05     \n",
      "=============================================================\n",
      "    group1        group2    meandiff p-adj lower upper reject\n",
      "-------------------------------------------------------------\n",
      "DeepSeek Chat OpenAI GPT-4o      0.0   nan   0.0   0.0  False\n",
      "-------------------------------------------------------------\n",
      "\n",
      "--- RQ_statement_Xi ---\n",
      "p-value: 6.933e-22 ***\n",
      "              df      sum_sq    mean_sq          F        PR(>F)\n",
      "C(model)     1.0   11.214383  11.214383  93.233498  6.932743e-22\n",
      "Residual  5488.0  660.111846   0.120283        NaN           NaN\n",
      "\n",
      "=== Tukey HSD for RQ_statement_Xi (balanced n = 501) ===\n",
      "      Multiple Comparison of Means - Tukey HSD, FWER=0.05      \n",
      "===============================================================\n",
      "    group1        group2    meandiff p-adj lower  upper  reject\n",
      "---------------------------------------------------------------\n",
      "DeepSeek Chat OpenAI GPT-4o   0.1637   0.0 0.1312 0.1961   True\n",
      "---------------------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "from statsmodels.formula.api import ols\n",
    "from statsmodels.stats.anova import anova_lm\n",
    "from statsmodels.stats.multicomp import pairwise_tukeyhsd\n",
    "import pandas as pd\n",
    "\n",
    "anova_results = []\n",
    "tukey_results = []\n",
    "\n",
    "for country in df_long['country'].unique():\n",
    "    sub_df = df_long[df_long['country'] == country].dropna()\n",
    "\n",
    "    # Skip if fewer than 2 model groups\n",
    "    if sub_df['model'].nunique() < 2:\n",
    "        print(f\"{country}: Not enough model groups.\")\n",
    "        continue\n",
    "\n",
    "    # ANOVA\n",
    "    model = ols('favorability ~ C(model)', data=sub_df).fit()\n",
    "    anova = anova_lm(model)\n",
    "    p = anova['PR(>F)'][0]\n",
    "\n",
    "    # Save ANOVA in format (a)\n",
    "    anova_results.append({\n",
    "        'country': country,\n",
    "        'F': anova['F'][0],\n",
    "        'p_value': anova['PR(>F)'][0],\n",
    "        'df_model': int(anova['df'][0]),\n",
    "        'df_resid': int(anova['df'][1])\n",
    "    })\n",
    "\n",
    "    if p < 0.05:\n",
    "        print(f\"\\n--- {country} ---\")\n",
    "        print(f\"p-value: {p:.4g} ***\")\n",
    "        print(anova)\n",
    "\n",
    "        # Downsample to balance group sizes\n",
    "        target_n = sub_df['model'].value_counts().min()\n",
    "        balanced_df = (\n",
    "            sub_df.groupby('model', group_keys=False)\n",
    "                  .apply(lambda x: x.sample(n=target_n, random_state=42))\n",
    "                  .reset_index(drop=True)\n",
    "        )\n",
    "\n",
    "        # Tukey HSD\n",
    "        tukey = pairwise_tukeyhsd(endog=balanced_df['favorability'],\n",
    "                                  groups=balanced_df['model'],\n",
    "                                  alpha=0.05)\n",
    "        print(f\"\\n=== Tukey HSD for {country} (balanced n = {target_n}) ===\")\n",
    "        print(tukey)\n",
    "\n",
    "        # Save Tukey HSD\n",
    "        tukey_data = tukey.summary().data[1:]  # skip header\n",
    "        tukey_cols = tukey.summary().data[0]\n",
    "        tukey_df = pd.DataFrame(tukey_data, columns=tukey_cols)\n",
    "        tukey_df[['meandiff', 'p-adj', 'lower', 'upper']] = tukey_df[['meandiff', 'p-adj', 'lower', 'upper']].astype(float)\n",
    "        tukey_df['country'] = country\n",
    "        tukey_results.append(tukey_df)\n",
    "    else:\n",
    "        print(f\"{country}: no significant difference between models (p = {p:.4g})\")\n",
    "\n",
    "# Concatenate and save\n",
    "anova_df = pd.DataFrame(anova_results)\n",
    "tukey_df = pd.concat(tukey_results, ignore_index=True)\n",
    "\n",
    "anova_df.to_csv(\"misinfo_anova_results.csv\", index=False)\n",
    "tukey_df.to_csv(\"misinfo_tukey_results.csv\", index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
